# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule


class ChinaSpider(CrawlSpider):
    name = 'china'
    allowed_domains = ['china.com']
    start_urls = ['https://digi.china.com/yaowen']

    rules = (
        Rule(LinkExtractor(allow=r'https://digi.china.com/digi/\d+/.*\.html', restrict_xpaths='//div[@id="left-slide"]//div[@class="con_item"]',),  callback='parse_item'),
        # Rule(LinkExtractor(restrict_xpaths='//div[@id="pageStyle"]//a[contains(.. "下一页")]')),
    )

    def parse_item(self, response):
        item = {}
        item['title'] = response.xpath('//div[@class="conR"]/h2/text()').extract_first()
        item['url'] = response.url
        item['text'] = "".join(response.xpath('//div[@class="conR"]/div[@class="conR_txt"]//text()').extract()).strip()
        print(item)
